packages = c('rmarkdown', 'plyr', 'dplyr','ggplot2', 'readr', 'tidyr', 'stringr', 'knitr', 'sparklyr', 'shiny', 'data.table', 'zoo','fasttime',"twitteR","openssl","httpuv" , 'rvest', 'httr','jsonlite','lubridate', 'twitteR','magrittr')
package.check <- lapply(packages, FUN = function(x) {
if (!require(x, character.only = TRUE)) {
install.packages(x, dependencies = TRUE)
library(x, character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE)
}
})
Loading required package: rmarkdown
Loading required package: plyr
Loading required package: dplyr
package 㤼㸱dplyr㤼㸲 was built under R version 4.0.5
Attaching package: 㤼㸱dplyr㤼㸲
The following objects are masked from 㤼㸱package:plyr㤼㸲:
arrange, count, desc, failwith, id, mutate, rename, summarise, summarize
The following objects are masked from 㤼㸱package:stats㤼㸲:
filter, lag
The following objects are masked from 㤼㸱package:base㤼㸲:
intersect, setdiff, setequal, union
Loading required package: ggplot2
Loading required package: readr
Loading required package: tidyr
Loading required package: stringr
Loading required package: knitr
Loading required package: sparklyr
package 㤼㸱sparklyr㤼㸲 was built under R version 4.0.5Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Attaching package: 㤼㸱sparklyr㤼㸲
The following object is masked from 㤼㸱package:plyr㤼㸲:
mutate
The following object is masked from 㤼㸱package:stats㤼㸲:
filter
Loading required package: shiny
Loading required package: data.table
Registered S3 method overwritten by 'data.table':
method from
print.data.table
data.table 1.14.0 using 6 threads (see ?getDTthreads). Latest news: r-datatable.com
Attaching package: 㤼㸱data.table㤼㸲
The following objects are masked from 㤼㸱package:dplyr㤼㸲:
between, first, last
Loading required package: zoo
Attaching package: 㤼㸱zoo㤼㸲
The following objects are masked from 㤼㸱package:base㤼㸲:
as.Date, as.Date.numeric
Loading required package: fasttime
Loading required package: twitteR
package 㤼㸱twitteR㤼㸲 was built under R version 4.0.5
Attaching package: 㤼㸱twitteR㤼㸲
The following objects are masked from 㤼㸱package:dplyr㤼㸲:
id, location
The following object is masked from 㤼㸱package:plyr㤼㸲:
id
Loading required package: openssl
Linking to: OpenSSL 1.1.1g 21 Apr 2020
Loading required package: httpuv
Loading required package: rvest
Loading required package: xml2
Attaching package: 㤼㸱rvest㤼㸲
The following object is masked from 㤼㸱package:readr㤼㸲:
guess_encoding
Loading required package: httr
Loading required package: jsonlite
Attaching package: 㤼㸱jsonlite㤼㸲
The following object is masked from 㤼㸱package:shiny㤼㸲:
validate
Loading required package: lubridate
Attaching package: 㤼㸱lubridate㤼㸲
The following objects are masked from 㤼㸱package:data.table㤼㸲:
hour, isoweek, mday, minute, month, quarter, second, wday, week, yday, year
The following objects are masked from 㤼㸱package:base㤼㸲:
date, intersect, setdiff, union
Loading required package: magrittr
Attaching package: 㤼㸱magrittr㤼㸲
The following object is masked from 㤼㸱package:tidyr㤼㸲:
extract
nature_html_parser <- read_rds(here::here("2. Data",paste0("001","nature_html_parser.rds")))
cmapply <- readr::read_rds(here::here("2. Data", paste0("001","cmapply.rds")))
journal_code_vector <- readr::read_rds(here::here("2. Data", paste0("001","_journal_code_vector.rds")))
journal_code_vector <- journal_code_vector[journal_code_vector!="nsmb"]
journal_code_vector <- journal_code_vector[journal_code_vector!="nataging"]
journal_code_vector <- journal_code_vector[journal_code_vector!="natastron"]
journal_code_vector <- c("ncb","nchem","ngeo","ni","nm","nmeth","nnano","nphoton","nplants")
tictoc::tic()
# get article
# it is not as efficient as I would like but then again I am webscrapping almost 30 years of article titles
article_combination_list <- cmapply(issue = 1:12, child_node = 1:15, subjournal = "natastron",volume = 1:50, FUN=nature_html_parser)
tictoc::toc()
# c(5421.13,5193.33 )
article_combination_list
nature_html_parser(subjournal = "nsmb",volume = ,issue = 12,child_node = 7)
write_rds(article_combination_list, here::here("2. Data",paste0(params$nb_id,"_natastron.rds")))
# closing unused connection 3 (https://www.nature.com/nprot/volumes/27/issues/7) 314 on 9/28
# journal_code_vector
for (variable in journal_code_vector) {
article_list <- cmapply(issue = 1:12, child_node = 1:15, subjournal = {{variable}},volume = 1:50, FUN=nature_html_parser)
write_rds(article_list, here::here("2. Data",paste0(params$nb_id,"_",variable,".rds")))
}